{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "hw3_CNN.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/Iallen520/lhy_DL_Hw/blob/master/hw3_CNN.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "D_a2USyd4giE",
        "colab_type": "text"
      },
      "source": [
        "# **Homework 3 - Convolutional Neural Network**\n",
        "\n",
        "若有任何問題，歡迎來信至助教信箱 ntu-ml-2020spring-ta@googlegroups.com"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zhzdomRTOKoJ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!gdown --id '19CzXudqN58R3D-1G8KeFWk8UDQwlb8is' --output food-11.zip # 下載資料集\n",
        "!unzip food-11.zip # 解壓縮"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9sVrKci4PUFW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Import需要的套件\n",
        "import os\n",
        "import numpy as np\n",
        "import cv2\n",
        "import torch\n",
        "import torch.nn as nn\n",
        "import torchvision.transforms as transforms\n",
        "import pandas as pd\n",
        "from torch.utils.data import DataLoader, Dataset\n",
        "import time"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "F0i9ZCPrOVN_",
        "colab_type": "text"
      },
      "source": [
        "#Read image\n",
        "利用 OpenCV (cv2) 讀入照片並存放在 numpy array 中"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Zf7QPifJQNUK",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def readfile(path, label):\n",
        "    # label 是一個 boolean variable，代表需不需要回傳 y 值\n",
        "    image_dir = sorted(os.listdir(path))\n",
        "    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)\n",
        "    y = np.zeros((len(image_dir)), dtype=np.uint8)\n",
        "    for i, file in enumerate(image_dir):\n",
        "        img = cv2.imread(os.path.join(path, file))\n",
        "        x[i, :, :] = cv2.resize(img,(128, 128))\n",
        "        if label:\n",
        "          y[i] = int(file.split(\"_\")[0])\n",
        "    if label:\n",
        "      return x, y\n",
        "    else:\n",
        "      return x"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5ebVIY5HQQH7",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#分別將 training set、validation set、testing set 用 readfile 函式讀進來\n",
        "workspace_dir = './food-11'\n",
        "print(\"Reading data\")\n",
        "train_x, train_y = readfile(os.path.join(workspace_dir, \"training\"), True)\n",
        "print(\"Size of training data = {}\".format(len(train_x)))\n",
        "val_x, val_y = readfile(os.path.join(workspace_dir, \"validation\"), True)\n",
        "print(\"Size of validation data = {}\".format(len(val_x)))\n",
        "test_x = readfile(os.path.join(workspace_dir, \"testing\"), False)\n",
        "print(\"Size of Testing data = {}\".format(len(test_x)))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "gq5KVMM3OHY6",
        "colab_type": "text"
      },
      "source": [
        "# Dataset\n",
        "在 Pytorch 中，我們可以利用 torch.utils.data 的 Dataset 及 DataLoader 來\"包裝\" data，使後續的 training 及 testing 更為方便。\n",
        "\n",
        "Dataset 需要 overload 兩個函數：\\_\\_len\\_\\_ 及 \\_\\_getitem\\_\\_\n",
        "\n",
        "\\_\\_len\\_\\_ 必須要回傳 dataset 的大小，而 \\_\\_getitem\\_\\_ 則定義了當程式利用 [ ] 取值時，dataset 應該要怎麼回傳資料。\n",
        "\n",
        "實際上我們並不會直接使用到這兩個函數，但是使用 DataLoader 在 enumerate Dataset 時會使用到，沒有實做的話會在程式運行階段出現 error。\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gKd2abixQghI",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#training 時做 data augmentation\n",
        "train_transform = transforms.Compose([\n",
        "    transforms.ToPILImage(),\n",
        "    transforms.RandomHorizontalFlip(), #隨機將圖片水平翻轉\n",
        "    transforms.RandomRotation(15), #隨機旋轉圖片\n",
        "    transforms.ToTensor(), #將圖片轉成 Tensor，並把數值normalize到[0,1](data normalization)\n",
        "])\n",
        "#testing 時不需做 data augmentation\n",
        "test_transform = transforms.Compose([\n",
        "    transforms.ToPILImage(),                                    \n",
        "    transforms.ToTensor(),\n",
        "])\n",
        "class ImgDataset(Dataset):\n",
        "    def __init__(self, x, y=None, transform=None):\n",
        "        self.x = x\n",
        "        # label is required to be a LongTensor\n",
        "        self.y = y\n",
        "        if y is not None:\n",
        "            self.y = torch.LongTensor(y)\n",
        "        self.transform = transform\n",
        "    def __len__(self):\n",
        "        return len(self.x)\n",
        "    def __getitem__(self, index):\n",
        "        X = self.x[index]\n",
        "        if self.transform is not None:\n",
        "            X = self.transform(X)\n",
        "        if self.y is not None:\n",
        "            Y = self.y[index]\n",
        "            return X, Y\n",
        "        else:\n",
        "            return X"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qz6jeMnkQl0_",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "batch_size = 128\n",
        "train_set = ImgDataset(train_x, train_y, train_transform)\n",
        "val_set = ImgDataset(val_x, val_y, test_transform)\n",
        "train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)\n",
        "val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "j9YhZo7POPYG",
        "colab_type": "text"
      },
      "source": [
        "# Model"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Y1c-GwrMQqMl",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "class Classifier(nn.Module):\n",
        "    def __init__(self):\n",
        "        super(Classifier, self).__init__()\n",
        "        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)\n",
        "        #torch.nn.MaxPool2d(kernel_size, stride, padding)\n",
        "        #input 維度 [3, 128, 128]\n",
        "        self.cnn = nn.Sequential(\n",
        "            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]\n",
        "            nn.BatchNorm2d(64),\n",
        "            nn.ReLU(),\n",
        "            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]\n",
        "\n",
        "            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]\n",
        "            nn.BatchNorm2d(128),\n",
        "            nn.ReLU(),\n",
        "            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]\n",
        "\n",
        "            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]\n",
        "            nn.BatchNorm2d(256),\n",
        "            nn.ReLU(),\n",
        "            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]\n",
        "\n",
        "            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]\n",
        "            nn.BatchNorm2d(512),\n",
        "            nn.ReLU(),\n",
        "            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]\n",
        "            \n",
        "            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]\n",
        "            nn.BatchNorm2d(512),\n",
        "            nn.ReLU(),\n",
        "            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]\n",
        "        )\n",
        "        self.fc = nn.Sequential(\n",
        "            nn.Linear(512*4*4, 1024),\n",
        "            nn.ReLU(),\n",
        "            nn.Linear(1024, 512),\n",
        "            nn.ReLU(),\n",
        "            nn.Linear(512, 11)\n",
        "        )\n",
        "\n",
        "    def forward(self, x):\n",
        "        out = self.cnn(x)\n",
        "        out = out.view(out.size()[0], -1)\n",
        "        return self.fc(out)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "aEnGbriXORN3",
        "colab_type": "text"
      },
      "source": [
        "# Training"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_5x-FH2Kr_jh",
        "colab_type": "text"
      },
      "source": [
        "使用training set訓練，並使用validation set尋找好的參數"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PHaFE-8oQtkC",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "model = Classifier().cuda()\n",
        "loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss\n",
        "optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam\n",
        "num_epoch = 30\n",
        "\n",
        "for epoch in range(num_epoch):\n",
        "    epoch_start_time = time.time()\n",
        "    train_acc = 0.0\n",
        "    train_loss = 0.0\n",
        "    val_acc = 0.0\n",
        "    val_loss = 0.0\n",
        "\n",
        "    model.train() # 確保 model 是在 train model (開啟 Dropout 等...)\n",
        "    for i, data in enumerate(train_loader):\n",
        "        optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零\n",
        "        train_pred = model(data[0].cuda()) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數\n",
        "        batch_loss = loss(train_pred, data[1].cuda()) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）\n",
        "        batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient\n",
        "        optimizer.step() # 以 optimizer 用 gradient 更新參數值\n",
        "\n",
        "        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())\n",
        "        train_loss += batch_loss.item()\n",
        "    \n",
        "    model.eval()\n",
        "    with torch.no_grad():\n",
        "        for i, data in enumerate(val_loader):\n",
        "            val_pred = model(data[0].cuda())\n",
        "            batch_loss = loss(val_pred, data[1].cuda())\n",
        "\n",
        "            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())\n",
        "            val_loss += batch_loss.item()\n",
        "\n",
        "        #將結果 print 出來\n",
        "        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \\\n",
        "            (epoch + 1, num_epoch, time.time()-epoch_start_time, \\\n",
        "             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2-ssSxXlsI_T",
        "colab_type": "text"
      },
      "source": [
        "得到好的參數後，我們使用training set和validation set共同訓練（資料量變多，模型效果較好）"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "RKoUxLun8lFG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train_val_x = np.concatenate((train_x, val_x), axis=0)\n",
        "train_val_y = np.concatenate((train_y, val_y), axis=0)\n",
        "train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)\n",
        "train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OoAS5TtRsfOo",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "model_best = Classifier().cuda()\n",
        "loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss\n",
        "optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam\n",
        "num_epoch = 30\n",
        "\n",
        "for epoch in range(num_epoch):\n",
        "    epoch_start_time = time.time()\n",
        "    train_acc = 0.0\n",
        "    train_loss = 0.0\n",
        "\n",
        "    model_best.train()\n",
        "    for i, data in enumerate(train_val_loader):\n",
        "        optimizer.zero_grad()\n",
        "        train_pred = model_best(data[0].cuda())\n",
        "        batch_loss = loss(train_pred, data[1].cuda())\n",
        "        batch_loss.backward()\n",
        "        optimizer.step()\n",
        "\n",
        "        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())\n",
        "        train_loss += batch_loss.item()\n",
        "\n",
        "        #將結果 print 出來\n",
        "    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \\\n",
        "      (epoch + 1, num_epoch, time.time()-epoch_start_time, \\\n",
        "      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2o1oCMXy61_3",
        "colab_type": "text"
      },
      "source": [
        "# Testing\n",
        "利用剛剛 train 好的 model 進行 prediction"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iAR6sn8U661G",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "test_set = ImgDataset(test_x, transform=test_transform)\n",
        "test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4HznI9_-ocrq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "model_best.eval()\n",
        "prediction = []\n",
        "with torch.no_grad():\n",
        "    for i, data in enumerate(test_loader):\n",
        "        test_pred = model_best(data.cuda())\n",
        "        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)\n",
        "        for y in test_label:\n",
        "            prediction.append(y)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3t2q2Th85ZUE",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#將結果寫入 csv 檔\n",
        "with open(\"predict.csv\", 'w') as f:\n",
        "    f.write('Id,Category\\n')\n",
        "    for i, y in  enumerate(prediction):\n",
        "        f.write('{},{}\\n'.format(i, y))"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}